package mosdi.discovery.objectives;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import mosdi.discovery.ExpectedClumpSizeBoundCalculator;
import mosdi.discovery.ObjectiveFunction;
import mosdi.discovery.ScoreAndPValue;
import mosdi.discovery.SequenceCountBoundCalculator;
import mosdi.discovery.TextModelIupacBounds;
import mosdi.discovery.MotifFinder.SearchState;
import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.DoublePatternExpectationCalculator;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.ForwardPatternExpectationCalculator;
import mosdi.fa.GeneralizedString;
import mosdi.fa.PatternExpectationCalculator;
import mosdi.index.SuffixTree;
import mosdi.paa.ClumpSizeCalculator;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;
import mosdi.util.SequenceUtils;

public class SequenceCountObjective implements ObjectiveFunction {
	private FiniteMemoryTextModel textModel;
	private int[] sequenceLengths;
	private BitArray[] sequenceOccurrenceAnnotation;
	private SearchState state;
	private int sequenceCount;
	private PatternExpectationCalculator expectationCalculator;
	/** IupacStringConstraints[i] contains constraints for
	 *  of pattern[i..]. */
	private IupacStringConstraints[] remainingConstraints;
	private double[] clumpSizeUpperBounds;
	private TextModelIupacBounds textModelBounds;
	private TextModelIupacBounds backwardTextModelBounds;
	/** Maximal upper bound for clump sizefor which a check is performed. */ 
	private final int maxCheckClumpSizeBounds = 100;
	private double goodEnoughClumpSizeBound;
	private ExpectedClumpSizeBoundCalculator clumpSizeBounder;
	private SequenceCountBoundCalculator pValueBounder;
	private int minSequenceCount;

	public SequenceCountObjective(FiniteMemoryTextModel textModel, int[] sequenceLengths, BitArray[] sequenceOccurrenceAnnotation, boolean useDetailedBounds) {
		this.textModel = textModel;
		this.sequenceLengths = sequenceLengths;
		this.textModelBounds = new TextModelIupacBounds(textModel, useDetailedBounds);
		this.backwardTextModelBounds = null;
		this.clumpSizeBounder = new ExpectedClumpSizeBoundCalculator(textModelBounds);
		this.sequenceOccurrenceAnnotation = sequenceOccurrenceAnnotation;
		this.sequenceCount = sequenceOccurrenceAnnotation[0].size();
		this.state = null;
		this.goodEnoughClumpSizeBound = 1.01;
		this.minSequenceCount = 0;
	}

	public void setGoodEnoughClumpSizeBound(double goodEnoughClumpSizeBound) {
		this.goodEnoughClumpSizeBound = goodEnoughClumpSizeBound;
	}
	
	/** The minimum number of sequences the motif needs to occur in. Motifs with less occurrences
	 *  are skipped. */
	public void setMinSequenceCount(int minSequenceCount) {
		this.minSequenceCount = minSequenceCount;
	}

	@Override
	public String getName() {
		return "sequence-count";
	}

	@Override
	public void initialize(SearchState searchState) {
		this.state = searchState;
		clumpSizeUpperBounds = new double[state.getPattern().length];
		Arrays.fill(clumpSizeUpperBounds, Double.POSITIVE_INFINITY);
		remainingConstraints = new IupacStringConstraints[state.getPattern().length+1];
		remainingConstraints[0] = state.getIupacConstraints();
		if (remainingConstraints[0]==null) throw new IllegalArgumentException("Need IUPAC constraints!");
		if (state.isReverseConsidered()) {
			if (backwardTextModelBounds==null) {
				backwardTextModelBounds = new TextModelIupacBounds(textModel.reverseTextModel(),false);
			}
			expectationCalculator = new DoublePatternExpectationCalculator(textModel, state.getPattern(),textModelBounds,backwardTextModelBounds);
		} else {
			expectationCalculator = new ForwardPatternExpectationCalculator(textModel, state.getPattern(),textModelBounds);	
		}
		double minExpClumpNumberPerChar = Math.pow(textModelBounds.characterClassMinProbabilityNoFuture(0), state.getStringLength())/maxCheckClumpSizeBounds;
		pValueBounder = new SequenceCountBoundCalculator(sequenceLengths, state.getStringLength(), minExpClumpNumberPerChar);
	}

	public int getScore(int[] nodes) {
		BitArray ba = new BitArray(sequenceCount);
		for (int node : nodes) ba.or(sequenceOccurrenceAnnotation[node]);
		return ba.numberOfOnes();
	}

	@Override
	public void updatePattern(int newCharacter, int leftmostChangedPosition) {
		int[] pattern = state.getPattern();
		remainingConstraints[leftmostChangedPosition+1] = remainingConstraints[leftmostChangedPosition].minus(pattern[leftmostChangedPosition]);
		expectationCalculator.setPatternPosition(leftmostChangedPosition, pattern[leftmostChangedPosition]);
		clumpSizeUpperBounds[leftmostChangedPosition] = (leftmostChangedPosition>0)?clumpSizeUpperBounds[leftmostChangedPosition-1]:Double.POSITIVE_INFINITY;
		// check if we already have a useful upper bound 
		if (clumpSizeUpperBounds[leftmostChangedPosition]>goodEnoughClumpSizeBound) {
			// if not: compute bound
			int prefixLength = leftmostChangedPosition+1;
			double newBound = clumpSizeBounder.upperBound(pattern, prefixLength, remainingConstraints[prefixLength], state.isReverseConsidered());
			if (newBound>maxCheckClumpSizeBounds) newBound = Double.POSITIVE_INFINITY;
			clumpSizeUpperBounds[prefixLength-1] = newBound;
		}

	}

	@Override
	public double pValueLowerBound(int prefixLength, int[] nodes) {
		// Step 1) Compute upper bound for the number of matches
		int matches = getScore(nodes);
		if (matches<minSequenceCount) return 1.0;
		if (Double.isInfinite(clumpSizeUpperBounds[prefixLength-1])) return 0.0;
		// Step 2) Compute bound for p-value
		double minExpClumpNumberPerChar = expectationCalculator.getLowerBound(prefixLength, remainingConstraints[prefixLength]) / clumpSizeUpperBounds[prefixLength-1];
		return pValueBounder.getPValueLowerBound(minExpClumpNumberPerChar, matches);
	}

	@Override
	public ScoreAndPValue evaluate(int[] nodes, double pValueThreshold) {
		int[] pattern = state.getPattern();
		int matches = getScore(nodes);
		if (matches<minSequenceCount) return new ScoreAndPValue();
		// Compute expectation
		double singleExpectation = expectationCalculator.getPrefixExpectation(pattern.length);
		// build automaton
		List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>(1);
		String s = Alphabet.getIupacAlphabet().buildString(pattern);
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		// Log.println(Log.Level.DEBUG, "Evaluating candidate: " + s);
		genStringList.add(Iupac.toGeneralizedString(s));
		if (state.isReverseConsidered()) {
			String sReverse = Iupac.reverseComplementary(s);
			if (s.compareTo(sReverse)>0) return new ScoreAndPValue();
			genStringList.add(Iupac.toGeneralizedString(sReverse));
		}
		CDFA cdfa = DFAFactory.build(dnaAlphabet, genStringList, 50000);
		// calculate clump size distribution
		ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, pattern.length);
		double[] clumpSizeDist = csc.clumpSizeDistribution(8, 1e-30);
		// calculate expected clump size
		double expectedClumpSize = 0.0;
		for (int i=1; i<clumpSizeDist.length; ++i) {
			expectedClumpSize+=clumpSizeDist[i]*i;
		}
		double pValue = SequenceUtils.calculateSequenceCountPValue(matches, sequenceLengths, singleExpectation, pattern.length, expectedClumpSize, false);
		if (pValue>0.0) return new ScoreAndPValue(matches, -Math.log(pValue));
		else {
			double logPValue = SequenceUtils.calculateSequenceCountPValue(matches, sequenceLengths, singleExpectation, pattern.length, expectedClumpSize, true);
			return new ScoreAndPValue(matches, -logPValue);
		}
	}

	@Override
	public ScoreAndPValue staticEvaluate(int[] pattern,	boolean considerReverse, SuffixTree suffixTree,	double pValueThreshold) {
		BitArray[] generalizedAlphabet = Iupac.asGeneralizedAlphabet();
		int[] nodes = suffixTree.walk(pattern, generalizedAlphabet);
		int matches = getScore(nodes);

		// Compute expectation
		PatternExpectationCalculator pec = null;
		if (considerReverse) {
			pec = new DoublePatternExpectationCalculator(textModel, pattern, textModelBounds, backwardTextModelBounds);
		} else {
			pec = new ForwardPatternExpectationCalculator(textModel, pattern, textModelBounds);	
		}
		double singleExpectation = pec.getPrefixExpectation(pattern.length);
		// Fast check ... 
		if (pValueThreshold<1.0) {
			ExpectedClumpSizeBoundCalculator b = new ExpectedClumpSizeBoundCalculator(textModelBounds);
			double clumpSizeUpperBound = b.upperBound(pattern, considerReverse);
			if (!Double.isInfinite(clumpSizeUpperBound)) {
				double pValueBound = SequenceUtils.calculateSequenceCountPValue(matches, sequenceLengths, singleExpectation, pattern.length, clumpSizeUpperBound, false);
				if (pValueBound>pValueThreshold) return null;
			}
		}
		// build automaton
		List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>(1);
		String s = Alphabet.getIupacAlphabet().buildString(pattern);
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		// Log.println(Log.Level.DEBUG, "Evaluating candidate: " + s);
		genStringList.add(Iupac.toGeneralizedString(s));
		if (considerReverse) {
			String sReverse = Iupac.reverseComplementary(s);
			if (s.compareTo(sReverse)>0) return new ScoreAndPValue();
			genStringList.add(Iupac.toGeneralizedString(sReverse));
		}
		
		CDFA cdfa = DFAFactory.build(dnaAlphabet, genStringList, 50000);
		// calculate clump size distribution
		ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, pattern.length);
		double[] clumpSizeDist = csc.clumpSizeDistribution(8, 1e-30);
		// calculate expected clump size
		double expectedClumpSize = 0.0;
		for (int i=1; i<clumpSizeDist.length; ++i) {
			expectedClumpSize+=clumpSizeDist[i]*i;
		}
		double pValue = SequenceUtils.calculateSequenceCountPValue(matches, sequenceLengths, singleExpectation, pattern.length, expectedClumpSize, false);
		if (pValue>0.0) return new ScoreAndPValue(matches, -Math.log(pValue));
		else {
			double logPValue = SequenceUtils.calculateSequenceCountPValue(matches, sequenceLengths, singleExpectation, pattern.length, expectedClumpSize, true);
			return new ScoreAndPValue(matches, -logPValue);
		}
	}

}
